package us.codecraft.webmagic.main;

import com.jfinal.plugin.activerecord.ActiveRecordPlugin;
import com.jfinal.plugin.druid.DruidPlugin;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.ArrayList;
import java.util.List;

public class LianjiaSh implements PageProcessor {

    //http://sh.lianjia.com/ershoufang/d100s7
    public static final String URL_LIST = "http://sh\\.lianjia\\.com/ershoufang/d\\d+s7";

    //http://sh.lianjia.com/ershoufang/sh4766450.html
    public static final String URL_POST = "/ershoufang/sh\\d+.html";

    private Site site = Site
            .me()
            .setDomain("sh.lianjia.com")
            .setSleepTime(3000)
            .setUserAgent(
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");

    @Override
    public void process(Page page) {

        List<String> pageLinks = new ArrayList<String>();
        for (int i = 1; i <= 2471; i++) {
            String href = "http://sh.lianjia.com/ershoufang/d" + i + "s7";
            pageLinks.add(href);
        }
        page.addTargetRequests(pageLinks);

        //System.out.println(page.getUrl());

        //列表页
        if (page.getUrl().regex(URL_LIST).match()) {
            List<String> targetUrls = page.getHtml().xpath("//div[@class='prop-title']").links().regex(URL_POST).all();
            for (String targetUrl : targetUrls) {
                targetUrl = "http://sh.lianjia.com" + targetUrl;
                page.addTargetRequest(targetUrl);
                if (targetUrl != null && !targetUrl.equals("")) {
                    new LianjiaIdList()
                            .set("target_url", targetUrl)
                            .save();
                }
            }
            //文章页
        } else {
            String targetUrlText = page.getUrl().toString();
            String titleText = page.getHtml().xpath("//h1[@class='header-title']/text()").toString();
            //总价：1000万
            String totalPriceText = page.getHtml().xpath("//span[@class='price-num']/text()").toString();
            //单价：39890
            String unitPriceText = page.getHtml().xpath("//p[@class='price-unit-num']/span/text()").toString();

            //maininfo-main房屋主要信息
            //户型：3室2厅
            String fx = page.getHtml().xpath("//ul[@class='maininfo-main']/li[1]/p[1]/text()").toString();
            //装修：中装
            String zx = page.getHtml().xpath("//ul[@class='maininfo-main']/li[1]/p[2]/text()").toString();
            //房屋朝向：南北
            String cx = page.getHtml().xpath("//ul[@class='maininfo-main']/li[2]/div/p[1]/text()").toString();
            //楼层：高区/19层
            String lc = page.getHtml().xpath("//ul[@class='maininfo-main']/li[2]/div/p[2]/text()").toString();
            //大小：116平
            String dx = page.getHtml().xpath("//ul[@class='maininfo-main']/li[3]/p[1]/text()").toString();
            //建造年份：2004年建造
            String nf = page.getHtml().xpath("//ul[@class='maininfo-main']/li[3]/p[2]/text()").toString();

            //首付：280
            String sf = page.getHtml().xpath("//ul[@class='maininfo-minor']/li[1]/span[2]/text()").toString();
            //参考月供：27598
            String yg = page.getHtml().xpath("//ul[@class='maininfo-minor']/li[2]/span[2]/text()").toString();
            //环线信息:中环
            String hx = page.getHtml().xpath("//ul[@class='maininfo-minor']/li[3]/span[2]/text()").toString();
            //所属小区：和玉苑
            //String xq1 = page.getHtml().xpath("//ul[@class='maininfo-minor']/li[4]/span[2]/span/a[1]/text()").toString();
            String xq1 = page.getHtml().$(".maininfo-minor li:nth-last-child(3) span:nth-child(2) span a:nth-child(1)", "text").toString();
            //String xq2 = page.getHtml().xpath("//ul[@class='maininfo-minor']/li[4]/span[2]/span/a[2]/text()").toString();
            String xq2 = page.getHtml().$(".maininfo-minor li:nth-last-child(3) span:nth-child(2) span a:nth-child(2)", "text").toString();
            //String xq3 = page.getHtml().xpath("//ul[@class='maininfo-minor']/li[4]/span[2]/span/a[3]/text()").toString();
            String xq3 = page.getHtml().$(".maininfo-minor li:nth-last-child(3) span:nth-child(2) span a:nth-child(3)", "text").toString();
            //小区地址：西藏北路1308弄1-7号
            //String dz = page.getHtml().xpath("//ul[@class='maininfo-minor']/li[5]/span[2]/text()").toString();
            String dz = page.getHtml().$(".maininfo-minor li:nth-last-child(2) span:nth-child(2)", "text").toString();
            //房屋编号 ：sh4458093
            //String bh = page.getHtml().xpath("//ul[@class='maininfo-minor']/li[6]/span[2]/text()").toString();
            String bh = page.getHtml().$(".maininfo-minor li:last-child span:nth-child(2)", "text").toString().trim();

            //上次交易	暂无数据
            String jy = page.getHtml().xpath("//div[@class='content-main module-tb']/div[@class='module-row'][2]/div[@class='module-col baseinfo-col2']/ul/li[1]/span[2]/text()").toString().trim();
            //房本年限:满五
            String fb = page.getHtml().xpath("//div[@class='content-main module-tb']/div[@class='module-row'][2]/div[@class='module-col baseinfo-col2']/ul/li[2]/span[2]/text()").toString().trim();
            //售房原因:置换
            String yy = page.getHtml().xpath("//div[@class='content-main module-tb']/div[@class='module-row'][2]/div[@class='module-col baseinfo-col3']/ul/li[1]/span[2]/text()").toString().trim();
            //房屋类型:住宅 - 公寓
            String lx = page.getHtml().xpath("//div[@class='content-main module-tb']/div[@class='module-row'][2]/div[@class='module-col baseinfo-col3']/ul/li[2]/span[2]/text()").toString().trim();

            //标签
            String tags = page.getHtml().xpath("//div[@class='content-main module-tb']/div[@class='module-row'][3]/div[@class='module-col baseinfo-colspan2']/ul/li/span/text()").all().toString().trim();

            if (titleText != null && !titleText.equals("")) {
                new LianjiaId()
                        .set("target_url", targetUrlText)
                        .set("title", titleText)
                        .set("house_no", bh)
                        .set("city", "sh")
                        .set("total_price", totalPriceText)
                        .set("unit_price", unitPriceText)
                        .set("district", xq2)
                        .set("town", xq3)
                        .set("village", xq1)
                        .set("build_year", nf)
                        .set("tags", tags)
                        .save();
            }
        }


//        List<String> links = page.getHtml().xpath("//div[@class='prop-title']").links().regex("/ershoufang/sh\\d+.html").all();
//
//        for (int i = 0; i < links.size(); i++) {
//            new LianjiaId().set("targeturl", links.get(i)).save();
//        }
//
//        page.putField("contend", links);

//        System.out.println("标题:" + page.getHtml().xpath("//div[@class='prop-title']/a/text()").toString());
//        System.out.println("房屋简介:" + page.getHtml().xpath("//div[@class='info-table']/div[@class='info-row']/span[@class='row1-text']/text()").toString());
//        System.out.println("总价:" + page.getHtml().xpath("//div[@class='info-table']/div[@class='info-row']/div[@class='price-item']/span[@class='total-price']/text()").toString());
//
//        System.out.println("小区:" + page.getHtml().xpath("//div[@class='info-table']/div[@class='info-row']/span[@class='row2-text']/a[@class='laisuzhou']/span/text()").toString());
//        System.out.println("镇:" + page.getHtml().xpath("//div[@class='info-table']/div[@class='info-row']/span[@class='row2-text']/a[3]/text()").toString());
//        System.out.println("行政区:" + page.getHtml().xpath("//div[@class='info-table']/div[@class='info-row']/span[@class='row2-text']/a[2]/text()").toString());
//        System.out.println("建造年份:" + page.getHtml().xpath("//div[@class='info-table']/div[@class='info-row']/span[@class='row2-text']/text()").toString().replaceAll("|", "").trim());
//        System.out.println("单价:" + page.getHtml().xpath("//div[@class='info-table']/div[@class='info-row']/span[@class='minor']/text()").toString());
//
//        System.out.println("Tags:" + page.getHtml().xpath("//div[@class='property-tag-container']").toString());
//
//        page.putField("title", page.getHtml().xpath("//div[@class='prop-title']").toString());
//        page.putField("info", page.getHtml().$("div.content").toString());
//        page.putField("tags", page.getHtml().xpath("//div[@class='BlogTags']/a/text()").all());
    }

    @Override
    public Site getSite() {
        return site;

    }

    public static void main(String[] args) {
        DruidPlugin c3p0Plugin = new DruidPlugin("jdbc:mysql://127.0.0.1/sblog?characterEncoding=utf-8", "root", "1qaz2wsx");
        c3p0Plugin.start();
        ActiveRecordPlugin activeRecordPlugin = new ActiveRecordPlugin(c3p0Plugin);
        activeRecordPlugin.addMapping("t_sh_second_sale", LianjiaId.class);
        activeRecordPlugin.addMapping("t_sh_second_sale_list", LianjiaIdList.class);
        activeRecordPlugin.start();
        Spider.create(new LianjiaSh()).addUrl("http://sh.lianjia.com/ershoufang/d1s7").thread(5).run();
    }
}
